library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(vroom)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(mapview)
moves <- vroom("../../data/OneDrive-2021-12-07/moves_monthly2020.csv")
## Rows: 187382 Columns: 29
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (21): safegraph_place_id, location_name, street_address, city, region, ...
## dbl (6): postal_code, raw_visit_counts, raw_visitor_counts, poi_cbg, dista...
## dttm (2): date_range_start, date_range_end
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
phila <- read_sf("phila.geojson")
library(furrr)
## Loading required package: future
numCores <- availableCores() - 1
plan(multiprocess, workers = numCores)
## Warning: Strategy 'multiprocess' is deprecated in future (>= 1.20.0). Instead,
## explicitly specify either 'multisession' or 'multicore'. In the current R
## session, 'multiprocess' equals 'multisession'.
parks <- phila %>%
filter( naics_code %in% c(712190, 713990))
## Should we do further filtration?
parkmoves <- moves %>%
filter(safegraph_place_id %in% as.list(parks$safegraph_place_id))
#tictoc::tic()
timeseries <-
parkmoves %>%
select(safegraph_place_id, date_range_start, date_range_end, visits_by_day) %>%
mutate(date_range_start = as_date(date_range_start),
date_range_end = as_date(date_range_end)) %>%
mutate(visits_by_day = str_remove_all(visits_by_day, pattern = "[\\[\\]]")) %>%
mutate(visits_by_day = str_split(visits_by_day, pattern = ",")) %>%
mutate(visits_by_day = future_map(visits_by_day, function(x){
unlist(x) %>%
as_tibble() %>%
mutate(day = 1:n(),
visits = value)
})) %>%
unnest(cols = c(visits_by_day)) %>%
dplyr::select(-value)
#tictoc::toc()
#timeseries$visits_by_day[1]
#1:n() creates a range
# tic() and toc() are used to record the time
tictoc::tic()
odMatrix <-
parkmoves %>%
select(safegraph_place_id, visitor_home_cbgs) %>%
mutate(visitor_home_cbgs = future_map(visitor_home_cbgs, function(x){
jsonlite::fromJSON(x) %>%
as_tibble()
})) %>%
unnest(visitor_home_cbgs) %>%
pivot_longer(!safegraph_place_id, names_to = "cbg", values_to = "visits") %>%
drop_na(visits)
tictoc::tic()
## Here join command use wrong columns and the way to calculate the distance should be change.
test <- odMatrix %>%
mutate(row_number = row_number()) %>%
group_by(cbg) %>% summarize(fake_distance = mean(row_number)) %>%
ungroup() %>%
left_join(odMatrix, .) %>%
left_join(., parkmoves)
## Joining, by = "cbg"
## Joining, by = "safegraph_place_id"
library(rgeos)
## Loading required package: sp
## rgeos version: 0.5-9, (SVN revision 684)
## GEOS runtime version: 3.9.1-CAPI-1.14.2
## Please note that rgeos will be retired by the end of 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## GEOS using OverlayNG
## Linking to sp version: 1.4-6
## Polygon checking: TRUE
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:rgeos':
##
## union
## The following objects are masked from 'package:future':
##
## %->%, %<-%
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(fastshp)
#install.packages("fastshp",,"http://rforge.net/",type="source")
source("https://raw.githubusercontent.com/alexsingleton/Huff-Tools/master/huff-tools.r")
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-28, (SVN revision 1158)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/shaun/Documents/R/win-library/4.1/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/shaun/Documents/R/win-library/4.1/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-6
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was C:/Users/shaun/Documents/R/win-library/4.1/rgdal/proj
##
## Attaching package: 'FNN'
## The following object is masked from 'package:igraph':
##
## knn
##The sources is the basic package for huff model
## The source is very important!!!
#!!if we tried to use temporal model, then the setting of the MODEL,PACKAGES should be changed
#!!if we tried to use multiple predictors instead of just number of visitors, then the package should be changed
huff_results <- huff_basic(test$location_name, test$raw_visit_counts, test$cbg, test$fake_distance)
ggplot()+
geom_point(data = parkmoves %>%
left_join(parks, by = c("safegraph_place_id")),
aes(y = latitude, x = longitude, color = raw_visitor_counts,
fill = raw_visitor_counts, size = raw_visitor_counts),
alpha = 0.5)
ggplot(parkmoves %>%
filter(location_name %in% c("Clark Park", "Strawberry Mansion Playground",
"Cherashore Playground", "Shot Tower Playground",
"Mander Playground", "Schuylkill River Park",
"Penn Treaty Park", "Jose Manuel Collazo Playground",
"Wissahickon Valley Park", "Weccacoe Playground")))+
geom_line(aes(x = date_range_start, y = raw_visit_counts))+
facet_wrap(~location_name, scales = "free")
ggplot(parkmoves)+
geom_point(aes(x = date_range_start, y = raw_visit_counts))
## Can we spatial join the points to a parks shapefile?
## Here parks should be changed to parks and recreation center
parkspoly <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
st_transform(4326)
## Reading layer `PPR_Properties' from data source
## `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 524 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13191
## Geodetic CRS: WGS 84
#ggplot()+geom_sf(data=parkspoly)
parks_and_polygon <- st_join(parks %>% st_transform(crs = 4326), parkspoly)
ggplot()+
geom_sf(data = parkspoly, fill = "light green", alpha = 0.4)+
geom_point(data = parks_and_polygon %>%
mutate(joins_municipal = ifelse(is.na(PARENT_NAME) == TRUE,
"No join", "Join")),
aes(y = latitude, x = longitude, color = joins_municipal))
ggplot()+
geom_sf(data = st_union(parkspoly), fill = "light green", alpha = 0.4)+
geom_point(data = parks_and_polygon %>%
filter(is.na(PARENT_NAME) == FALSE),
aes(y = latitude, x = longitude), size = 0.5)
#this can be used to test whether safegraph data is complete or not.
mapView(parks, alpha = 0.6, size = 0.5) + mapView(parkspoly, col.regions = "green")